//
// windowspill.S  --  register window spill routine
//
// $Id: //depot/rel/Foxhill/dot.8/Xtensa/OS/hal/windowspill_asm.S#1 $

// Copyright (c) 1999-2010 Tensilica Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#include <xtensa/coreasm.h>


//  xthal_window_spill_nw
//
//  Spill live register windows to the stack.
//
//  Required entry conditions:
//	PS.WOE = 0
//	PS.INTLEVEL >= XCHAL_EXCM_LEVEL
//	a1 = valid stack pointer (note: some regs may be spilled at a1-16)
//	a0 = return PC (usually set by call0 or callx0 when calling this function)
//	a2,a3 undefined
//	a4 thru a15 valid, if they are part of window(s) to be spilled
//     (Current window a0..a15 saved if necessary.)
//	WINDOWSTART[WINDOWBASE] = 1
//
//  Exit conditions:
//	PS.WOE, PS.INTLEVEL = same as on entry
//	WINDOWBASE = same as on entry
//	WINDOWSTART updated to reflect spilled windows
//		(equals 1<<WINDOWBASE if successful return)
//	a0 = return PC
//	a1 = same as on entry
//	a2 = error code:
//		0 --> successful
//			(WINDOWSTART = 1<<WINDOWBASE)
//		1 --> invalid WINDOWSTART (WINDOWBASE bit not set)
//			(WINDOWSTART unchanged)
//		2 --> invalid window size (not 4, 8 or 12 regs)
//			(WINDOWSTART bits of successfully spilled
//			 windows are cleared, others left intact)
//	a3 clobbered
//	a4,a5,a8,a9,a12,a13 = same as on entry
//	a6,a7,a10,a11,a14,a15 clobbered if they were part of window(s)
//		to be spilled, otherwise they are the same as on entry
//	loop registers (LCOUNT,LBEG,LEND) are NOT affected (they were in earlier versions)
//	SAR clobbered
//
//  All non-spilled register windows will be spilled.
//  Beware that this may include a4..a15 of the current window,
//  so generally these should not have been clobbered by the
//  caller if it is at all possible that these registers
//  are part of an unspilled window (it often is possible)
//  (otherwise the spilled stack would be invalid).
//
//  THIS MEANS: the caller is responsible for saving a0-a15 but
//  the caller must leave a4-a15 intact when control is transferred
//  here.
//
//  It may be reentrant (but stack pointer is invalid during
//  execution due to window rotations, so can't take interrupts
//  and exceptions in the usual manner, so ... what does
//  reentrancy really mean here?).


	//  The xthal_spill_registers_into_stack_nw entry point
	//  is kept here only for backwards compatibility.
	//  It will be removed in the very near future.
	.global	xthal_spill_registers_into_stack_nw

	.text
	.align 4
	.global	xthal_window_spill_nw
xthal_window_spill_nw:
xthal_spill_registers_into_stack_nw:	// BACKWARD COMPATIBILITY ONLY - see above

#if ! XCHAL_HAVE_WINDOWED
	//  Nothing to do -- window option was not selected.
	movi	a2, 0		// always report success
	ret
#else /* XCHAL_HAVE_WINDOWED */
#define WSBITS	(XCHAL_NUM_AREGS / 4)		/* width of WINDOWSTART register in bits */
#define WBBITS	(XCHAL_NUM_AREGS_LOG2 - 2)	/* width of WINDOWBASE register in bits */
	/*
	 * Rearrange (rotate) window start bits relative to the current
	 * window (WINDOWBASE).  WINDOWSTART currently looks like this:
	 *
	 *          a15-a0
	 * NAREG-1   |  |    0
	 *    |      vvvv    |
	 *    xxxxxxxxxx1yyyyy
	 *              ^
	 *              |
	 *              WINDOWBASE
	 *
	 * The start bit pointed to by WINDOWBASE must be set
	 * (we return an error if it isn't), as it corresponds
	 * to the start of the current window (shown as a0-a15).
	 *
	 * We want the window start bits rotated to look like this:
	 *              1yyyyyxxxxxxxxxx
	 *
	 * Note that there is one start bit for every four registers;
	 * and the total number of registers (NAREG) can be 32 or 64;
	 * so the number of start bits in WINDOWSTART is NAREG/4,
	 * and the size of WINDOWSTART can be 8 or 16.
	 */

	rsr.windowbase	a2
	addi	a2, a2, 1
	ssr	a2		// sar = WINDOWBASE + 1
	rsr.windowstart	a3
	srl	a2, a3		// a2 is 0... | 000000xxxxxxxxxx = WINDOWSTART >> sar
	sll	a3, a3		// a3 is 1yyyyy0000000000 | 0... = WINDOWSTART << (32 - sar)
	bgez	a3, .Linvalid_ws	// verify that msbit is indeed set

	srli	a3, a3, 32-WSBITS	// a3 is 0... | 1yyyyy0000000000 = a3 >> (32-NAREG/4)
	or	a2, a2, a3		// a2 is 0... | 1yyyyyxxxxxxxxxx

	/*
	 *	FIND THE FIRST ONE
	 *
	 *  Now we have (in a2) the window start bits rotated in order
	 *  from oldest (closest to lsbit) to current (msbit set).
	 *  Each start bit (that is set), other than the current one,
	 *  corresponds to a window frame to spill.
	 *
	 *  Now find the first start bit, ie. the first frame to spill,
	 *  by looking for the first bit set in a2 (from lsbit side).
	 */

#if XCHAL_HAVE_NSA
	neg     a3, a2		// keep only the least-significant bit set of a2 ...
	and     a3, a3, a2	// ... in a3
	nsau    a3, a3		// get index of that bit, numbered from msbit (32 if absent)
	ssl	a3		// set sar = 32 - a3 = bit index numbered from lsbit + 1
#else /* XCHAL_HAVE_NSA */
	wsr.windowstart	a2	// temporarily save rotated start bits
				// (we can use WINDOWSTART because WOE=0)

	//  NOTE:  this could be optimized a bit, by explicit coding rather than the macro.
	find_ls_one	a3, a2	// set a3 to index of lsmost bit set in a2 (a2 clobbered)

	addi	a2, a3, 1	// index+1
	ssr	a2		// set sar = index + 1
	rsr.windowstart	a2	// restore a2 (rotated start bits)
#endif /* XCHAL_HAVE_NSA */
	srl	a2, a2		// right-justify the rotated start bits (dropping lsbit set)
	wsr.windowstart	a2	// save rotated + justified window start bits,
				//  because a2 will disappear when modifying WINDOWBASE
				// again, we can use WINDOWSTART because WOE=0

	/*
	 *  Rotate WindowBase so that a0 of the next window to spill is in a4
	 *  (ie. leaving us with a2 and a3 to play with, because a0 and a1
	 *  may be those of the original window which we must preserve).
	 */
	rsr.windowbase	a2
#if XCHAL_HAVE_NSA
	addi	a2, a2, 31
	sub	a3, a2, a3	// a3 = WINDOWBASE + index = WINDOWBASE + (31 - msbit_index)
#else /* XCHAL_HAVE_NSA */
	add	a3, a2, a3	// a3 = WINDOWBASE + index
#endif /* XCHAL_HAVE_NSA */
	wsr.windowbase	a3	// effectively do:  rotw index
	rsync			// wait for write to WINDOWBASE to complete
	//  Now our registers have changed!

	rsr.windowstart	a2	// restore a2 (rotated + justified window start bits)

	/*
	 *  We are now ready to start the window spill loop.
	 *  Relative to the above, a2 and WINDOWBASE are now as follows:
	 *
	 *        1yyyyyxxxxxxxxxx = rotated start bits as shown above
	 *        1yyyyyxxxx100000 = actual rotated start bits (example)
	 *  0000001yyyyyxxxx ^     = a2 = rotated + justified start bits
	 *        ^      xxx1^     = window being spilled
	 *        ^          ^
	 *        |          |
	 *    original    current
	 *   WINDOWBASE  WINDOWBASE
	 *
	 *  The first window to spill (save) starts at what is now a4.
	 *  The spill loop maintains the adjusted start bits in a2,
	 *  shifting them right as each window is spilled.
	 */

.Lspill_loop:
	//  Top of save loop.
	//  Find the size of this call and branch to the appropriate save routine.

	beqz	a2, .Ldone		// if no start bit remaining, we're done
	bbsi.l	a2, 0, .Lspill4		// if next start bit is set, it's a call4
	bbsi.l	a2, 1, .Lspill8		// if 2nd next bit set, it's a call8
	bbsi.l	a2, 2, .Lspill12	// if 3rd next bit set, it's a call12
	j	.Linvalid_window	// else it's an invalid window!



	// SAVE A CALL4
.Lspill4:
	addi	a3, a9, -16	// a3 gets call[i+1]'s sp - 16
	s32i	a4, a3, 0	// store call[i]'s a0
	s32i	a5, a3, 4	// store call[i]'s a1
	s32i	a6, a3, 8	// store call[i]'s a2
	s32i	a7, a3, 12	// store call[i]'s a3

	srli	a6, a2, 1	// move and shift the start bits
	rotw	1		// rotate the window

	j	.Lspill_loop

	// SAVE A CALL8
.Lspill8:
	addi	a3, a13, -16	// a0 gets call[i+1]'s sp - 16
	s32i	a4, a3, 0	// store call[i]'s a0
	s32i	a5, a3, 4	// store call[i]'s a1
	s32i	a6, a3, 8	// store call[i]'s a2
	s32i	a7, a3, 12	// store call[i]'s a3

	addi	a3, a5, -12	// call[i-1]'s sp address
	l32i	a3, a3, 0	// a3 is call[i-1]'s sp
			// (load slot)
	addi	a3, a3, -32	// a3 points to our spill area

	s32i	a8, a3, 0	// store call[i]'s a4
	s32i	a9, a3, 4	// store call[i]'s a5
	s32i	a10, a3, 8	// store call[i]'s a6
	s32i	a11, a3, 12	// store call[i]'s a7

	srli	a10, a2, 2	// move and shift the start bits
	rotw	2		// rotate the window

	j	.Lspill_loop

	// SAVE A CALL12
.Lspill12:
	rotw	1		// rotate to see call[i+1]'s sp

	addi	a13, a13, -16	// set to the reg save area
	s32i	a0, a13, 0	// store call[i]'s a0
	s32i	a1, a13, 4	// store call[i]'s a1
	s32i	a2, a13, 8	// store call[i]'s a2
	s32i	a3, a13, 12	// store call[i]'s a3

	addi	a3, a1, -12	// call[i-1]'s sp address
	l32i	a3, a3, 0	// a3 has call[i-1]'s sp
	addi	a13, a13, 16	// restore call[i+1]'s sp (here to fill load slot)
	addi	a3, a3, -48	// a3 points to our save area

	s32i	a4, a3, 0	// store call[i]'s a4
	s32i	a5, a3, 4	// store call[i]'s a5
	s32i	a6, a3, 8	// store call[i]'s a6
	s32i	a7, a3, 12	// store call[i]'s a7
	s32i	a8, a3, 16	// store call[i]'s a4
	s32i	a9, a3, 20	// store call[i]'s a5
	s32i	a10, a3, 24	// store call[i]'s a6
	s32i	a11, a3, 28	// store call[i]'s a7

	rotw	-1		// rotate to see start bits (a2)
	srli	a14, a2, 3	// move and shift the start bits
	rotw	3		// rotate to next window

	j	.Lspill_loop



.Ldone:
	rotw	1		// back to the original window
	rsr.windowbase	a2	// get (original) window base
	ssl	a2		// setup for shift left by WINDOWBASE
	movi	a2, 1
	sll	a2, a2		// compute new WINDOWSTART = 1<<WINDOWBASE
	wsr.windowstart	a2	// and apply it
	rsync
	movi	a2, 0		// done!
	ret
	//jx	a0


	//  Invalid WINDOWSTART register.
	//
.Linvalid_ws:
	movi	a2, 1		// indicate invalid WINDOWSTART
	ret			// return from subroutine


	//  Invalid window size!
	//  The three bits following the start bit are all clear, so
	//  we have an invalid window state (can't determine a window size).
	//
	//  So we exit with an error, but to do that we must first restore
	//  the original WINDOWBASE.  We also compute a sensible
	//  WINDOWSTART that has the start bits of spilled windows
	//  cleared, but all other start bits intact, so someone debugging
	//  the failure can look at WINDOWSTART to see which window
	//  failed to spill.
	//
.Linvalid_window:
	slli	a2, a2, 1	// space for missing start bit
	addi	a2, a2, 1	// add missing start bit
	rsr.windowbase	a3	// get current WINDOWBASE
	bbsi.l	a2, WSBITS-1, 2f	// branch if current WINDOWBASE==original
1:	addi	a3, a3, -1	// decrement towards original WINDOWBASE
	slli	a2, a2, 1	// shift towards original WINDOWSTART alignment
	bbci.l	a2, WSBITS-1, 1b	// repeat until ms start bit set
	extui	a3, a3, 0, WBBITS	// mask out upper base bits, in case of carry-over
2:	//  Here, a3 = original WINDOWBASE;
	//  and msbit of start bits in a2 is set, and no other bits above it.
	//  Now rotate a2 to become the correct WINDOWSTART.
	ssl	a3		// set shift left ... (sar = 32 - orig WB)
	slli	a3, a2, 32-WSBITS	// left-justify start bits
	src	a2, a2, a3	// rotate left by original WINDOWBASE
	extui	a2, a2, 0, WSBITS	// keep only significant start bits
	wsr.windowstart	a2	// we've cleared only start bits of spilled windows
	rsr.sar	a3		// retrieve 32 - original WINDOWBASE
	movi	a2, 32
	sub	a3, a2, a3	// restore original WINDOWBASE
	wsr.windowbase	a3	// back to original WINDOWBASE
	rsync

	movi	a2, 2		// indicate invalid window size
	ret

#endif /* XCHAL_HAVE_WINDOWED */

	.size	xthal_window_spill_nw, . - xthal_window_spill_nw


//  void  xthal_window_spill (void);
//
//  Spill live register windows to the stack.
//
//  This will spill all register windows except this
//  function's window, and possibly that of its caller.
//  (Currently, the caller's window is spilled and reloaded
//   when this function returns.  This may change with
//   future optimisations.)
//
//  Another, simpler way to implement this might be
//  to use an appropriate sequence of call/entry/retw
//  instructions to force overflow of any live windows.
//
//  Assumes that PS.INTLEVEL=0 and PS.WOE=1 on entry/exit.
//
	.text
	.align 4
	.global	xthal_window_spill
	.type	xthal_window_spill,@function
xthal_window_spill:
	abi_entry
#if XCHAL_HAVE_WINDOWED
	movi	a6, ~(PS_WOE_MASK|PS_INTLEVEL_MASK)	// (using a6 ensures any window using this a4..a7 is spilled)
	rsr.ps	a5
	mov	a4, a0			 // save a0
	and	a2, a5, a6		 // clear WOE, INTLEVEL
	addi	a2, a2, XCHAL_EXCM_LEVEL // set INTLEVEL = XCHAL_EXCM_LEVEL
	wsr.ps	a2			 // apply to PS
	rsync
	call0	xthal_window_spill_nw
	mov	a0, a4		// restore a0
	wsr.ps	a5		// restore PS
	rsync
#endif /* XCHAL_HAVE_WINDOWED */
	abi_return

	.size	xthal_window_spill, . - xthal_window_spill

